from bs4 import BeautifulSoup
import requests, json
from book import *

headers = {'Cookie': 'Hm_lvt_acfbfe93830e0272a88e1cc73d4d6d0f=1668230023,1668241009; Hm_lvt_d72896ddbf8d27c750e3b365ea2fc902=1668230023,1668241009; Hm_lpvt_d72896ddbf8d27c750e3b365ea2fc902=1668249547; Hm_lpvt_acfbfe93830e0272a88e1cc73d4d6d0f=1668241029; __vtins__1xpAUPUjtatG3hli=%7B%22sid%22%3A%20%2244354f9f-c0bc-51c1-b37f-1bcf85693799%22%2C%20%22vd%22%3A%201%2C%20%22stt%22%3A%200%2C%20%22dr%22%3A%200%2C%20%22expires%22%3A%201668251338243%2C%20%22ct%22%3A%201668249538243%7D; __51uvsct__1xpAUPUjtatG3hli=1; __51vcke__1xpAUPUjtatG3hli=bdf61955-9f68-5f95-840e-ae0bc16e455a; __51vuft__1xpAUPUjtatG3hli=1668249538245', 'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:106.0) Gecko/20100101 Firefox/106.0'}
with open('booklist.json', 'r') as f:
  booklist = json.load(f)

id = 1
for book in booklist:
  print('正在获取书籍 %s 首页'%id)
  req = requests.get(book, headers = headers)
  req.encoding = 'gbk'
  soup = BeautifulSoup(req.text, 'lxml')
  book = Book()
  #设置书籍属性
  book.setTitle(soup.select('#content span b')[0].text)
  book.setAuthor(soup.select('#content div')[0].select('tr')[2].select('td')[1].text.split('：')[1])
  book.setCover(soup.select('#content div')[0].select('img')[0].attrs['src'])
  #跳转到书籍目录
  categoryAddr = soup.select('#content div')[0].select('fieldset a')[0].attrs['href']
  baseAddr = '/'.join(categoryAddr.split('/')[:-1]) + '/'
  print('正在获取书籍 %s 目录'%id)
  req = requests.get(categoryAddr, headers = headers)
  req.encoding = 'gbk'
  soup = BeautifulSoup(req.text, 'lxml')
  articles = soup.select('.ccss a')
  #获取正文
  for articleAddr in articles:
    print('正在获取书籍 %s 目录 %s 的正文'%(id, articleAddr.text))
    req = requests.get(baseAddr + articleAddr.attrs['href'], headers = headers)
    req.encoding = 'gbk'
    soup = BeautifulSoup(req.text, 'lxml')
    book.appendText(soup.select('#title')[0].text, soup.select('#content')[0].prettify())
  print('书籍 %s 正在写入文件'%id)
  toEpub(book, book.title + '.epub')
  id += 1